{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([0.80199222, 0.69477733, 0.83000436, 0.60975194, 0.55430339,\n",
       "        0.4454938 , 0.48716133, 0.41699328, 0.26842395, 0.59417058]),\n",
       " [[1], [1], [1], [1], [1], [1], [1], [1], [1], [1]])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import numpy as np\n",
    "\n",
    "#每个老虎机的中奖概率,0-1之间的均匀分布\n",
    "probs = np.random.uniform(size=10)\n",
    "\n",
    "#记录每个老虎机的返回值\n",
    "rewards = [[1] for _ in range(10)]\n",
    "\n",
    "probs, rewards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import random\n",
    "\n",
    "\n",
    "#贪婪算法\n",
    "def choose_one():\n",
    "    #有小概率随机选择一根拉杆\n",
    "    if random.random() < 0.01:\n",
    "        return random.randint(0, 9)\n",
    "\n",
    "    #计算每个老虎机的奖励平均\n",
    "    rewards_mean = [np.mean(i) for i in rewards]\n",
    "\n",
    "    #选择期望奖励估值最大的拉杆\n",
    "    return np.argmax(rewards_mean)\n",
    "\n",
    "\n",
    "choose_one()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "scrolled": true
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[[1, 1], [1], [1], [1], [1], [1], [1], [1], [1], [1]]"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def try_and_play():\n",
    "    i = choose_one()\n",
    "\n",
    "    #玩老虎机,得到结果\n",
    "    reward = 0\n",
    "    if random.random() < probs[i]:\n",
    "        reward = 1\n",
    "\n",
    "    #记录玩的结果\n",
    "    rewards[i].append(reward)\n",
    "\n",
    "\n",
    "try_and_play()\n",
    "\n",
    "rewards"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/",
     "height": 312
    },
    "executionInfo": {
     "elapsed": 676,
     "status": "ok",
     "timestamp": 1649954384006,
     "user": {
      "displayName": "Sam Lu",
      "userId": "15789059763790170725"
     },
     "user_tz": -480
    },
    "id": "wIHh_wRA8YDz",
    "outputId": "d5d65ff2-744d-44e2-ec8a-eb78d13397c2"
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(4150.021823075759, 4077)"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def get_result():\n",
    "    #玩N次\n",
    "    for _ in range(5000):\n",
    "        try_and_play()\n",
    "\n",
    "    #期望的最好结果\n",
    "    target = probs.max() * 5000\n",
    "\n",
    "    #实际玩出的结果\n",
    "    result = sum([sum(i) for i in rewards])\n",
    "\n",
    "    return target, result\n",
    "\n",
    "\n",
    "get_result()"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "第2章-多臂老虎机问题.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 1
}
